示例#1
0
def test_etcd_heartbeat_timeout(startcredis_etcdonly):
    """ Test that failure is detected and repaired within a heartbeat timeout.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1) # Don't notify master
    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n

    import pdb; pdb.set_trace()
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
示例#2
0
def test_etcd_master_recovery(startcredis_etcdonly):
    """ Test that the master can recover its state from etcd.
    """
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=10)

    chain = master_client.execute_command('MASTER.GET_CHAIN')
    head = master_client.execute_command('MASTER.REFRESH_HEAD')
    tail = master_client.execute_command('MASTER.REFRESH_TAIL')
    assert len(chain) == 3, 'chain %s' % chain

    common.KillMaster()
    time.sleep(0.2)
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.1)

    assert chain == master_client.execute_command('MASTER.GET_CHAIN')
    assert head == master_client.execute_command('MASTER.REFRESH_HEAD')
    assert tail == master_client.execute_command('MASTER.REFRESH_TAIL')

    new_node, _ = common.AddNode(master_client)

    # Sanity check that normal operation can continue.
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 4

    new_node.kill()
示例#3
0
def test_etcd_kill_middle(startcredis_etcdonly):
    """ Test that if the middle node is removed, the tail continues to get updates
    once the chain is repaired.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread.
    n = 100
    sleep_secs = 0.1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
示例#4
0
    def testCkptOnly(self):
        common.Start(gcs_mode=common.GCS_CKPTONLY)
        self.ack_client.execute_command('TAIL.CHECKPOINT')

        with self.assertRaises(redis.exceptions.ResponseError) as ctx:
            self.head_client.execute_command('HEAD.FLUSH')
        self.assertTrue(
            'GcsMode is NOT set to kCkptFlush' in str(ctx.exception))
示例#5
0
    def testNormal(self):
        common.Start()
        # By default, the execution mode is kNormal, which disallows flush/ckpt.
        with self.assertRaises(redis.exceptions.ResponseError) as ctx:
            self.ack_client.execute_command('TAIL.CHECKPOINT')
        self.assertTrue('GcsMode is set to kNormal' in str(ctx.exception))

        with self.assertRaises(redis.exceptions.ResponseError) as ctx:
            self.head_client.execute_command('HEAD.FLUSH')
        self.assertTrue(
            'GcsMode is NOT set to kCkptFlush' in str(ctx.exception))
示例#6
0
def BenchVanillaRedis(num_ops):
    common.Start(chain=common.MakeChain(1))
    time.sleep(0.1)
    r = AckClient()  # Just use the chain node as a regular redis server.

    start = time.time()
    for i in range(num_ops):
        i_str = str(i)  # Serialize once.
        r.execute_command('SET', i_str, i_str)
    total_secs = time.time() - start
    common.log('throughput %.1f writes/sec; latency (us): mean %.5f std ? num %d' %
          (num_ops * 1.0 / total_secs, total_secs * 1e6 / num_ops, num_ops))
示例#7
0
def BenchCredis(num_nodes, num_ops, num_clients, master_mode):
    common.Start(chain=common.MakeChain(num_nodes), master_mode=master_mode)
    time.sleep(0.1)

    # TODO(zongheng): ops_completed needs to be changed
    assert num_clients == 1

    drivers = []
    for i in range(num_clients):
        drivers.append(
            multiprocessing.Process(target=SeqPut, args=(num_ops, 0)))
    for driver in drivers:
        driver.start()
    for driver in drivers:
        driver.join()

    assert ops_completed.value == num_ops
    Check(ops_completed.value)
示例#8
0
    def testAck(self):
        common.Start()

        head_client = redis.StrictRedis("127.0.0.1", 6370)
        tail_client = redis.StrictRedis("127.0.0.1", 6371)
        # The ack client needs to be separate, since subscriptions
        # are blocking
        ack_client = redis.StrictRedis("127.0.0.1", 6371)
        p = ack_client.pubsub(ignore_subscribe_messages=True)
        p.subscribe(_CLIENT_ID)
        time.sleep(0.5)
        p.get_message()
        ssn = head_client.execute_command("MEMBER.PUT", "task_spec",
                                          "some_random_value", _CLIENT_ID)
        time.sleep(0.5)
        put_ack = p.get_message()

        assert ssn == 0
        assert int(put_ack["data"]) == ssn  # Check the sequence number
示例#9
0
def test_etcd_master_online_recovery(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and restarted mid-way, then a member is
    killed, then a member is added. The restarted master should be able to recover the chain, with
    the new member being the tail, and no updates should be lost.
    """
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=10)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    driver.join()
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()
示例#10
0
def test_etcd_kill_node_while_master_is_dead(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and a node is killed WHILE the master is
    dead. The master is then restarted. No updates should be lost.

    TODO: Fails (3/28) because members are not checked for liveness when the master starts up.
    """
    # Choose a long heartbeat timeout so that the master never receives heartbeat expiry notifs.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=999)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.KillNode(index=1)
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.2)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()
示例#11
0
    def testBasics(self):
        common.Start(gcs_mode=common.GCS_CKPTFLUSH)

        self.head_client.execute_command('MEMBER.PUT', 'k1', 'v1', _CLIENT_ID)

        self.assertEqual(b'v1', self.ack_client.execute_command('READ', 'k1'))

        # 1 entry checkpointed.
        self.assertEqual(1, self.ack_client.execute_command('TAIL.CHECKPOINT'))

        # 0 entry checkpointed.
        self.assertEqual(0, self.ack_client.execute_command('TAIL.CHECKPOINT'))

        self.head_client.execute_command('MEMBER.PUT', 'k1', 'v2', _CLIENT_ID)
        self.assertEqual(1, self.ack_client.execute_command('TAIL.CHECKPOINT'))

        self.head_client.execute_command('MEMBER.PUT', 'k1', 'v3', _CLIENT_ID)

        # Process k1 (first seqnum).  Physically, 0 key has been flushed out of
        # _redis_ memory state, because k1 has 2 dirty writes.
        self.assertEqual(0, self.head_client.execute_command('HEAD.FLUSH'))

        # Process k1 (second seqnum).
        self.assertEqual(0, self.head_client.execute_command('HEAD.FLUSH'))
        # It remains in memory because of a dirty write (k1, v3).
        self.assertEqual(b'v3', self.ack_client.execute_command('GET k1'))

        # Now all seqnums checkpointed.
        self.assertEqual(1, self.ack_client.execute_command('TAIL.CHECKPOINT'))
        # Process k1 (3rd seqnum).  1 means it's physically flushed.
        self.assertEqual(1, self.head_client.execute_command('HEAD.FLUSH'))

        # Check that redis's native GET returns nothing.
        self.assertIsNone(self.ack_client.execute_command('GET k1'))
        # READ is credis' read mechanism, can read checkpoints.
        self.assertEqual(b'v3', self.ack_client.execute_command('READ k1'))
示例#12
0
 def testCannotFlush(self):
     common.Start(gcs_mode=common.GCS_CKPTFLUSH)
     r = self.head_client.execute_command('HEAD.FLUSH')
     self.assertEqual(0, r)
示例#13
0
 def testCkptFlush(self):
     common.Start(gcs_mode=common.GCS_CKPTFLUSH)
     self.ack_client.execute_command('TAIL.CHECKPOINT')
     self.head_client.execute_command('HEAD.FLUSH')