def test_integrity_on_data_node_failure(): shakedown.wait_for( lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_1_NAME), HDFS_CMD_TIMEOUT_SEC) # gives chance for write to succeed and replication to occur time.sleep(5) kill_task_with_pattern("DataNode", 'data-0-node.hdfs.mesos') kill_task_with_pattern("DataNode", 'data-1-node.hdfs.mesos') time.sleep(1) # give DataNode a chance to die shakedown.wait_for( lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_1_NAME ), HDFS_CMD_TIMEOUT_SEC) check_health()
def test_integrity_on_name_node_failure(): """ The first name node (name-0-node) is the active name node by default when HDFS gets installed. This test checks that it is possible to write and read data after the first name node fails. """ kill_task_with_pattern("NameNode", 'name-0-node.hdfs.mesos') time.sleep(1) # give NameNode a chance to die shakedown.wait_for( lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_2_NAME), HDFS_CMD_TIMEOUT_SEC) shakedown.wait_for( lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_2_NAME ), HDFS_CMD_TIMEOUT_SEC) check_health()
def test_kill_namenode_executor(): kill_task_with_pattern('hdfs.executor.Main', 'name-0-node.hdfs.mesos') check_health()
def test_kill_all_namenodes(): for host in shakedown.get_service_ips(PACKAGE_NAME): kill_task_with_pattern('NameNode', host) check_health()
def test_kill_journalnode_executor(): kill_task_with_pattern('hdfs.executor.Main', 'journal-0-node.hdfs.mesos') check_health()
def test_kill_scheduler(): kill_task_with_pattern('hdfs.scheduler.Main', get_scheduler_host()) check_health()
def test_kill_journal_node(): kill_task_with_pattern('JournalNode', 'journal-0-node.hdfs.mesos') check_health()
def test_kill_name_node(): kill_task_with_pattern('NameNode', 'name-0-node.hdfs.mesos') time.sleep(1) # give NameNode a chance to die check_health()
def test_kill_data_node(): kill_task_with_pattern('DataNode', 'data-0-node.hdfs.mesos') check_health()