def test_config_from_second_generation_snapshot(cluster): """ A regression test for #44: confirm that if we load a snapshot on startup, do nothing, then re-create a snapshot we don't end up with a messed up nodes config. """ cluster.create(3) # Bump the log a bit for _ in range(20): assert cluster.raft_exec('INCR', 'testkey') # Compact to get rid of logs node3 = cluster.node(3) assert node3.client.execute_command('RAFT.DEBUG', 'COMPACT') == b'OK' # Restart node node3.restart() node3.wait_for_node_voting() # Bump the log a bit for _ in range(20): assert cluster.raft_exec('INCR', 'testkey') # Recompact cluster.wait_for_unanimity() assert node3.client.execute_command('RAFT.DEBUG', 'COMPACT') == b'OK' node3.restart() node3.wait_for_node_voting() assert node3.raft_info()['num_nodes'] == 3
def test_removed_node_remains_dead(cluster): """ A removed node stays down and does not resurrect in any case. """ cluster.create(3) # Some baseline data for _ in range(100): cluster.raft_exec('INCR', 'counter') # Remove node 3 cluster.node(1).client.execute_command('RAFT.NODE', 'REMOVE', '3') cluster.node(1).wait_for_num_voting_nodes(2) # Add more data for _ in range(100): cluster.raft_exec('INCR', 'counter') # Check node = cluster.node(3) # Verify node 3 does not accept writes with raises(RedisError): node.client.execute_command('RAFT', 'INCR', 'counter') # Verify node 3 still does not accept writes after a restart node.terminate() node.start() with raises(RedisError): node.client.execute_command('RAFT', 'INCR', 'counter')
def test_fuzzing_with_restarts_and_rewrites(cluster): """ Counter fuzzer with log rewrites. """ nodes = 3 cycles = 100 cluster.create(nodes) # Randomize max log entries for node in cluster.nodes.values(): node.client.execute_command('RAFT.CONFIG', 'SET', 'raft-log-max-file-size', str(random.randint(1000, 2000))) for i in range(cycles): assert cluster.raft_exec('INCRBY', 'counter', 1) == i + 1 logging.info('---------- Executed INCRBY # %s', i) if random.randint(1, 7) == 1: r = random.randint(1, nodes) logging.info('********** Restarting node %s **********', r) cluster.node(r).restart() cluster.node(r).wait_for_election() logging.info('********** Node %s is UP **********', r) assert int(cluster.raft_exec('GET', 'counter')) == cycles
def test_reelection_basic_flow(cluster): """ Basic reelection flow """ cluster.create(3) assert cluster.leader == 1 assert cluster.raft_exec('SET', 'key', 'value') == b'OK' cluster.node(1).terminate() cluster.node(2).wait_for_election() assert cluster.raft_exec('SET', 'key2', 'value2') == b'OK' cluster.exec_all('GET', 'key2')
def test_proxying(cluster): """ Command proxying from follower to leader works """ cluster.create(3) assert cluster.leader == 1 with raises(ResponseError, match='MOVED'): assert cluster.node(2).raft_exec('SET', 'key', 'value') == b'OK' assert cluster.node(2).client.execute_command('RAFT.CONFIG', 'SET', 'follower-proxy', 'yes') == b'OK' # Basic sanity assert cluster.node(2).raft_exec('SET', 'key', 'value') == b'OK' assert cluster.raft_exec('GET', 'key') == b'value' # Numeric values assert cluster.node(2).raft_exec('SADD', 'myset', 'a') == 1 assert cluster.node(2).raft_exec('SADD', 'myset', 'b') == 1 # Multibulk assert set(cluster.node(2).raft_exec('SMEMBERS', 'myset')) == set([b'a', b'b']) # Nested multibulk assert set( cluster.node(2).raft_exec('EVAL', 'return {{\'a\',\'b\',\'c\'}};', 0)[0]) == set([b'a', b'b', b'c']) # Error with raises(ResponseError, match='WRONGTYPE'): cluster.node(2).raft_exec('INCR', 'myset')
def test_stale_reads_on_leader_election(cluster): """ """ cluster.create(3) # Try 10 times for _ in range(10): val_written = cluster.raft_exec("INCR", "counter-1") leader = cluster.node(cluster.leader) leader.terminate() leader.start(verify=False) val_read = cluster.raft_exec('GET', 'counter-1') assert val_read is not None assert val_written == int(val_read) time.sleep(1)
def test_log_rollback(cluster): """ Rollback of log entries that were written in the minority. """ cluster.create(3) assert cluster.leader == 1 assert cluster.raft_exec('INCRBY', 'key', '111') == 111 # Break cluster cluster.node(2).terminate() cluster.node(3).terminate() # Load a command which can't be committed assert cluster.node(1).current_index() == 6 conn = cluster.node(1).client.connection_pool.get_connection('RAFT') conn.send_command('RAFT', 'INCRBY', 'key', '222') assert cluster.node(1).current_index() == 7 cluster.node(1).terminate() # We want to be sure the last entry is in the log log = RaftLog(cluster.node(1).raftlog) log.read() assert log.entry_count() == 7 # Restart the cluster without node 1, make sure the write was # not committed. cluster.node(2).start() cluster.node(3).start() cluster.node(2).wait_for_election() assert cluster.node(2).current_index() == 7 # 6 + 1 no-op entry # Restart node 1 cluster.node(1).start() cluster.node(1).wait_for_election() # Make another write and make sure it overwrites the previous one in # node 1's log assert cluster.raft_exec('INCRBY', 'key', '333') == 444 cluster.wait_for_unanimity() # Make sure log reflects the change log.reset() log.read() assert match(r'.*INCRBY.*333', str(log.entries[-1].data()))
def test_node_history_with_same_address(cluster): "" "" cluster.create(5) cluster.raft_exec("INCR", "step-counter") # Remove nodes ports = [] for node_id in [2, 3, 4, 5]: ports.append(cluster.node(node_id).port) cluster.remove_node(node_id) cluster.leader_node().wait_for_log_applied() cluster.node(cluster.leader).wait_for_num_nodes(1) # Now add and remove several more times for _ in range(5): for port in ports: n = cluster.add_node(port=port) cluster.leader_node().wait_for_num_nodes(2) cluster.leader_node().wait_for_log_applied() cluster.remove_node(n.id) cluster.leader_node().wait_for_num_nodes(1) cluster.leader_node().wait_for_log_applied() # Add enough data in the log to satisfy timing for _ in range(3000): cluster.raft_exec("INCR", "step-counter") # Add another node new_node = cluster.add_node(port=ports[0]) new_node.wait_for_node_voting() # Terminate all cluster.terminate() # Start new node cluster.start() # need some time to start applying logs.. time.sleep(2) assert cluster.raft_exec("GET", "step-counter") == b'3001'
def test_fuzzing_with_config_changes(cluster): """ Basic Raft fuzzer test """ nodes = 5 cycles = 100 cluster.create(nodes) for i in range(cycles): assert cluster.raft_exec('INCRBY', 'counter', 1) == i + 1 if random.randint(1, 7) == 1: try: node_id = cluster.random_node_id() cluster.remove_node(node_id) except ResponseError: continue cluster.add_node().wait_for_node_voting() assert int(cluster.raft_exec('GET', 'counter')) == cycles
def test_fuzzing_with_restarts(cluster): """ Basic Raft fuzzer test """ nodes = 3 cycles = 100 cluster.create(nodes) for i in range(cycles): assert cluster.raft_exec('INCRBY', 'counter', 1) == i + 1 logging.info('---------- Executed INCRBY # %s', i) if i % 7 == 0: r = random.randint(1, nodes) logging.info('********** Restarting node %s **********', r) cluster.node(r).restart() cluster.node(r).wait_for_election() logging.info('********** Node %s is UP **********', r) assert int(cluster.raft_exec('GET', 'counter')) == cycles
def test_stale_log_trim(cluster): """ When starting up, if log is older than snapshot it should be trimmed. """ cluster.create(3, prepopulate_log=20) # Stop node 3 and advance the log; then overwrite node3 # with a recent snapshot and start it. This simulates delivery of snapshot # and a crash sometime before the log is adjusted. cluster.node(3).terminate() for _ in range(20): assert cluster.raft_exec('INCR', 'testkey') assert cluster.node(1).client.execute_command('RAFT.DEBUG', 'COMPACT') == b'OK' for _ in range(20): assert cluster.raft_exec('INCR', 'testkey') shutil.copyfile(os.path.join(os.curdir, cluster.node(1).dbfilename), os.path.join(os.curdir, cluster.node(3).dbfilename)) cluster.node(3).start() cluster.node(3).wait_for_node_voting() assert cluster.raft_exec('INCR', 'last-key')
def test_log_reset_on_snapshot_load(cluster): """ Test correct reset of log when a snapshot is received. """ cluster.create(3, prepopulate_log=20) # Stop node 3, advance the log, then compact. cluster.node(3).terminate() for _ in range(20): assert cluster.raft_exec('INCR', 'testkey') assert cluster.node(1).client.execute_command('RAFT.DEBUG', 'COMPACT') == b'OK' # Start node 3 and wait for it to receive a snapshot cluster.node(3).start() cluster.node(3).wait_for_node_voting() # Restart node 3 and make sure it correctly started cluster.node(3).terminate() cluster.node(3).start() cluster.node(3).wait_for_node_voting() assert cluster.raft_exec('INCR', 'last-key') cluster.wait_for_unanimity()
def test_new_uncommitted_during_rewrite(cluster): cluster.create(3) # Take down majority to create uncommitted entries and check rewrite cluster.node(1).raft_exec('SET', 'key', '1') # Initiate compaction and wait to see it's in progress conn = cluster.node(1).client.connection_pool.get_connection('COMPACT') conn.send_command('RAFT.DEBUG', 'COMPACT', '2') cluster.node(1).wait_for_info_param('snapshot_in_progress', 'yes') assert cluster.node(1).raft_info()['snapshot_in_progress'] == 'yes' # Send a bunch of writes cluster.node(1).raft_exec('INCRBY', 'key', '2') cluster.node(1).raft_exec('INCRBY', 'key', '3') cluster.node(1).raft_exec('INCRBY', 'key', '4') # Wait for compaction to end assert cluster.node(1).raft_info()['snapshot_in_progress'] == 'yes' cluster.node(1).wait_for_info_param('snapshot_in_progress', 'no') # Make sure our writes made it to the log log = RaftLog(cluster.node(1).raftlog) log.read() assert log.entry_count(LogEntry.LogType.NORMAL) == 3 # Extra check -- Make sure we can read it back. Note that we need to start # all nodes because we don't log the commit index. cluster.node(1).terminate() cluster.node(2).terminate() cluster.node(3).terminate() cluster.node(1).start() cluster.node(2).start() cluster.node(3).start() cluster.node(1).wait_for_info_param('state', 'up') # Make sure cluster state is as expected assert cluster.raft_exec('get', 'key') == b'10' # Make sure node 1 state is as expected cluster.node(1).wait_for_log_applied() assert cluster.node(1).client.get('key') == b'10'
def test_read_before_commits(cluster): """ """ cluster.create(3) assert cluster.raft_exec('GET', 'somekey') is None