def test_etcd_master_recovery(startcredis_etcdonly): """ Test that the master can recover its state from etcd. """ common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=10) chain = master_client.execute_command('MASTER.GET_CHAIN') head = master_client.execute_command('MASTER.REFRESH_HEAD') tail = master_client.execute_command('MASTER.REFRESH_TAIL') assert len(chain) == 3, 'chain %s' % chain common.KillMaster() time.sleep(0.2) common.StartMaster(master_mode=MASTER_ETCD) time.sleep(0.1) assert chain == master_client.execute_command('MASTER.GET_CHAIN') assert head == master_client.execute_command('MASTER.REFRESH_HEAD') assert tail == master_client.execute_command('MASTER.REFRESH_TAIL') new_node, _ = common.AddNode(master_client) # Sanity check that normal operation can continue. assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 4 new_node.kill()
def test_etcd_heartbeat_timeout(startcredis_etcdonly): """ Test that failure is detected and repaired within a heartbeat timeout. """ # Start members with a quick heartbeat timeout. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=2) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1) # Don't notify master # Heartbeat should expire within 2 sec. driver.join() assert ops_completed.value == n import pdb; pdb.set_trace() chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + 1, 'chain %s' % chain Check(ops_completed.value)
def test_etcd_kill_middle(startcredis_etcdonly): """ Test that if the middle node is removed, the tail continues to get updates once the chain is repaired. """ # Start members with a quick heartbeat timeout. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=2) # Launch driver thread. n = 100 sleep_secs = 0.1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1, notify=master_client) driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + 1, 'chain %s' % chain Check(ops_completed.value)
def BenchVanillaRedis(num_ops): common.Start(chain=common.MakeChain(1)) time.sleep(0.1) r = AckClient() # Just use the chain node as a regular redis server. start = time.time() for i in range(num_ops): i_str = str(i) # Serialize once. r.execute_command('SET', i_str, i_str) total_secs = time.time() - start common.log('throughput %.1f writes/sec; latency (us): mean %.5f std ? num %d' % (num_ops * 1.0 / total_secs, total_secs * 1e6 / num_ops, num_ops))
def BenchCredis(num_nodes, num_ops, num_clients, master_mode): common.Start(chain=common.MakeChain(num_nodes), master_mode=master_mode) time.sleep(0.1) # TODO(zongheng): ops_completed needs to be changed assert num_clients == 1 drivers = [] for i in range(num_clients): drivers.append( multiprocessing.Process(target=SeqPut, args=(num_ops, 0))) for driver in drivers: driver.start() for driver in drivers: driver.join() assert ops_completed.value == num_ops Check(ops_completed.value)
def test_etcd_master_online_recovery(startcredis_etcdonly): """ Test that SeqPut succeeds when the master is killed and restarted mid-way, then a member is killed, then a member is added. The restarted master should be able to recover the chain, with the new member being the tail, and no updates should be lost. """ common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=10) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) common.KillMaster() common.StartMaster(master_mode=MASTER_ETCD) time.sleep(0.1) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1, notify=master_client) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2 new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD) time.sleep(0.1) driver.join() assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 # Heartbeat should expire within 2 sec. driver.join() assert ops_completed.value == n Check(ops_completed.value) # Cleanup new_node.kill()
def test_etcd_kill_node_while_master_is_dead(startcredis_etcdonly): """ Test that SeqPut succeeds when the master is killed and a node is killed WHILE the master is dead. The master is then restarted. No updates should be lost. TODO: Fails (3/28) because members are not checked for liveness when the master starts up. """ # Choose a long heartbeat timeout so that the master never receives heartbeat expiry notifs. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=999) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) common.KillMaster() common.KillNode(index=1) common.StartMaster(master_mode=MASTER_ETCD) time.sleep(0.2) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2 new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD) time.sleep(0.1) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 driver.join() assert ops_completed.value == n Check(ops_completed.value) # Cleanup new_node.kill()