Exemplo n.º 1
0
def test_etcd_master_recovery(startcredis_etcdonly):
    """ Test that the master can recover its state from etcd.
    """
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=10)

    chain = master_client.execute_command('MASTER.GET_CHAIN')
    head = master_client.execute_command('MASTER.REFRESH_HEAD')
    tail = master_client.execute_command('MASTER.REFRESH_TAIL')
    assert len(chain) == 3, 'chain %s' % chain

    common.KillMaster()
    time.sleep(0.2)
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.1)

    assert chain == master_client.execute_command('MASTER.GET_CHAIN')
    assert head == master_client.execute_command('MASTER.REFRESH_HEAD')
    assert tail == master_client.execute_command('MASTER.REFRESH_TAIL')

    new_node, _ = common.AddNode(master_client)

    # Sanity check that normal operation can continue.
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 4

    new_node.kill()
Exemplo n.º 2
0
def test_etcd_heartbeat_timeout(startcredis_etcdonly):
    """ Test that failure is detected and repaired within a heartbeat timeout.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1) # Don't notify master
    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n

    import pdb; pdb.set_trace()
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
Exemplo n.º 3
0
def test_etcd_kill_middle(startcredis_etcdonly):
    """ Test that if the middle node is removed, the tail continues to get updates
    once the chain is repaired.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread.
    n = 100
    sleep_secs = 0.1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
Exemplo n.º 4
0
def BenchVanillaRedis(num_ops):
    common.Start(chain=common.MakeChain(1))
    time.sleep(0.1)
    r = AckClient()  # Just use the chain node as a regular redis server.

    start = time.time()
    for i in range(num_ops):
        i_str = str(i)  # Serialize once.
        r.execute_command('SET', i_str, i_str)
    total_secs = time.time() - start
    common.log('throughput %.1f writes/sec; latency (us): mean %.5f std ? num %d' %
          (num_ops * 1.0 / total_secs, total_secs * 1e6 / num_ops, num_ops))
Exemplo n.º 5
0
def BenchCredis(num_nodes, num_ops, num_clients, master_mode):
    common.Start(chain=common.MakeChain(num_nodes), master_mode=master_mode)
    time.sleep(0.1)

    # TODO(zongheng): ops_completed needs to be changed
    assert num_clients == 1

    drivers = []
    for i in range(num_clients):
        drivers.append(
            multiprocessing.Process(target=SeqPut, args=(num_ops, 0)))
    for driver in drivers:
        driver.start()
    for driver in drivers:
        driver.join()

    assert ops_completed.value == num_ops
    Check(ops_completed.value)
Exemplo n.º 6
0
def test_etcd_master_online_recovery(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and restarted mid-way, then a member is
    killed, then a member is added. The restarted master should be able to recover the chain, with
    the new member being the tail, and no updates should be lost.
    """
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=10)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    driver.join()
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()
Exemplo n.º 7
0
def test_etcd_kill_node_while_master_is_dead(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and a node is killed WHILE the master is
    dead. The master is then restarted. No updates should be lost.

    TODO: Fails (3/28) because members are not checked for liveness when the master starts up.
    """
    # Choose a long heartbeat timeout so that the master never receives heartbeat expiry notifs.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=999)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.KillNode(index=1)
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.2)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()