def test_mark_nodes_failed_after_missing_timeout_heartbeat(self): node, script_set = self.make_node() script_set.last_ping = datetime.now() - timedelta(minutes=11) script_set.save() script_results = [ factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PENDING) for _ in range(3) ] mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals(self.failed_status, node.status) self.assertEquals( "Node has not been heard from for the last 10 minutes", node.error_description) self.assertIn( call( "%s: Has not been heard from for the last 10 minutes" % node.hostname), self.maaslog.call_args_list) if node.enable_ssh: self.assertThat(self.mock_stop, MockNotCalled()) else: self.assertThat(self.mock_stop, MockCalledOnce()) self.assertIn( call("%s: Stopped because SSH is disabled" % node.hostname), self.maaslog.call_args_list) for script_result in script_results: self.assertEquals( SCRIPT_STATUS.TIMEDOUT, reload_object(script_result).status)
def test_uses_param_runtime(self): node, script_set = self.make_node() now = datetime.now() script_set.last_ping = now script_set.save() passed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PASSED) failed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.FAILED) pending_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PENDING) script = factory.make_Script(timeout=timedelta(minutes=2)) running_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=now - timedelta(minutes=50), parameters={'runtime': { 'type': 'runtime', 'value': 60 * 60, }}) mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals(self.status, node.status) self.assertThat(self.mock_stop, MockNotCalled()) self.assertEquals( SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status) self.assertEquals( SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status) self.assertEquals( SCRIPT_STATUS.PENDING, reload_object(pending_script_result).status) self.assertEquals( SCRIPT_STATUS.RUNNING, reload_object(running_script_result).status)
def test_mark_nodes_failed_after_builtin_commiss_script_overrun(self): user = factory.make_admin() node = factory.make_Node(status=NODE_STATUS.COMMISSIONING, owner=user) script_set = ScriptSet.objects.create_commissioning_script_set(node) node.current_commissioning_script_set = script_set node.save() current_time = now() script_set.last_ping = current_time script_set.save() pending_script_results = list(script_set.scriptresult_set.all()) passed_script_result = pending_script_results.pop() passed_script_result.status = SCRIPT_STATUS.PASSED passed_script_result.save() failed_script_result = pending_script_results.pop() failed_script_result.status = SCRIPT_STATUS.FAILED failed_script_result.save() running_script_result = pending_script_results.pop() running_script_result.status = SCRIPT_STATUS.RUNNING running_script_result.started = current_time - timedelta(minutes=10) running_script_result.save() mark_nodes_failed_after_missing_script_timeout(current_time, 20) node = reload_object(node) self.assertEquals(NODE_STATUS.FAILED_COMMISSIONING, node.status) self.assertEquals( "%s has run past it's timeout(%s)" % ( running_script_result.name, str(NODE_INFO_SCRIPTS[running_script_result.name]["timeout"]), ), node.error_description, ) self.assertIn( call("%s: %s has run past it's timeout(%s)" % ( node.hostname, running_script_result.name, str(NODE_INFO_SCRIPTS[running_script_result.name]["timeout"]), )), self.maaslog.call_args_list, ) if node.enable_ssh: self.assertThat(self.mock_stop, MockNotCalled()) else: self.assertThat(self.mock_stop, MockCalledOnce()) self.assertIn( call("%s: Stopped because SSH is disabled" % node.hostname), self.maaslog.call_args_list, ) self.assertEquals(SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status) self.assertEquals(SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status) self.assertEquals(SCRIPT_STATUS.TIMEDOUT, reload_object(running_script_result).status) for script_result in pending_script_results: self.assertEquals(SCRIPT_STATUS.ABORTED, reload_object(script_result).status)
def test_mark_nodes_failed_after_script_overrun(self): node, script_set = self.make_node() current_time = now() script_set.last_ping = current_time script_set.save() passed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PASSED) failed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.FAILED) pending_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PENDING) script = factory.make_Script(timeout=timedelta(seconds=60)) running_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=current_time - timedelta(minutes=10), ) mark_nodes_failed_after_missing_script_timeout(current_time, 20) node = reload_object(node) self.assertEquals(self.failed_status, node.status) self.assertEquals( "%s has run past it's timeout(%s)" % ( running_script_result.name, str(running_script_result.script.timeout), ), node.error_description, ) self.assertIn( call("%s: %s has run past it's timeout(%s)" % ( node.hostname, running_script_result.name, str(running_script_result.script.timeout), )), self.maaslog.call_args_list, ) if node.enable_ssh: self.assertThat(self.mock_stop, MockNotCalled()) else: self.assertThat(self.mock_stop, MockCalledOnce()) self.assertIn( call("%s: Stopped because SSH is disabled" % node.hostname), self.maaslog.call_args_list, ) self.assertEquals(SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status) self.assertEquals(SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status) self.assertEquals(SCRIPT_STATUS.ABORTED, reload_object(pending_script_result).status) self.assertEquals(SCRIPT_STATUS.TIMEDOUT, reload_object(running_script_result).status)
def test_mark_nodes_handled_last_ping_None(self): node, script_set = self.make_node() script_set.last_ping = None script_set.save() for _ in range(3): factory.make_ScriptResult(script_set=script_set, status=SCRIPT_STATUS.PENDING) # No exception should be raised. mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals(self.status, node.status)
def test_mark_nodes_failed_after_missing_timeout_prefetches(self): self.patch(Node, "mark_failed") current_time = now() node, script_set = self.make_node() script_set.last_ping = current_time script_set.save() script = factory.make_Script(timeout=timedelta(seconds=60)) factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=current_time - timedelta(minutes=3), ) counter_one = CountQueries() with counter_one: mark_nodes_failed_after_missing_script_timeout(current_time, 20) nodes = [] for _ in range(6): node, script_set = self.make_node() script_set.last_ping = current_time script_set.save() script = factory.make_Script(timeout=timedelta(seconds=60)) factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=current_time - timedelta(minutes=3), ) nodes.append(node) counter_many = CountQueries() with counter_many: mark_nodes_failed_after_missing_script_timeout(current_time, 20) # Lookup takes 7 queries no matter the amount of Nodes # 1. Get all Nodes in commissioning or testing # 2. Get all commissioning ScriptSets # 3. Get all testing ScriptSets # 4. Get all commissioning ScriptResults # 5. Get all testing ScriptResults # 6. Get all commissioning Scripts # 7. Get all testing Scripts self.assertEquals(7, counter_one.num_queries) self.assertEquals(7, counter_many.num_queries)
def test_sets_status_expires_when_flatlined_with_may_reboot_script(self): node, script_set = self.make_node() now = datetime.now() if self.status == NODE_STATUS.COMMISSIONING: script_type = SCRIPT_TYPE.COMMISSIONING else: script_type = SCRIPT_TYPE.TESTING script = factory.make_Script(script_type=script_type, may_reboot=True) factory.make_ScriptResult( script=script, script_set=script_set, status=SCRIPT_STATUS.RUNNING) script_set.last_ping = now - timedelta(11) script_set.save() mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals( now - (now - script_set.last_ping) + timedelta( minutes=NODE_FAILURE_MONITORED_STATUS_TIMEOUTS[self.status]), node.status_expires)
def test_sets_status_expires_when_flatlined_with_may_reboot_script(self): node, script_set = self.make_node() current_time = now() if self.status == NODE_STATUS.COMMISSIONING: script_type = SCRIPT_TYPE.COMMISSIONING else: script_type = SCRIPT_TYPE.TESTING script = factory.make_Script(script_type=script_type, may_reboot=True) factory.make_ScriptResult( script=script, script_set=script_set, status=SCRIPT_STATUS.RUNNING) script_set.last_ping = current_time - timedelta(11) script_set.save() mark_nodes_failed_after_missing_script_timeout(current_time, 20) node = reload_object(node) self.assertEquals( current_time - (current_time - script_set.last_ping) + timedelta( minutes=get_node_timeout(self.status, 20)), node.status_expires)
def test_mark_nodes_failed_after_missing_timeout_prefetches(self): self.patch(Node, 'mark_failed') now = datetime.now() nodes = [] for _ in range(3): node, script_set = self.make_node() script_set.last_ping = now script_set.save() script = factory.make_Script(timeout=timedelta(seconds=60)) factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=now - timedelta(minutes=3)) nodes.append(node) counter = CountQueries() with counter: mark_nodes_failed_after_missing_script_timeout() # Initial lookup and prefetch take three queries. This is done once to # find the nodes which nodes are being tests and on each node which # scripts are currently running. self.assertEquals(3 + len(nodes) * 2, counter.num_queries)
def test_uses_param_runtime(self): node, script_set = self.make_node() current_time = now() script_set.last_ping = current_time script_set.save() passed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PASSED ) failed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.FAILED ) pending_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PENDING ) script = factory.make_Script(timeout=timedelta(minutes=2)) running_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=current_time - timedelta(minutes=50), parameters={"runtime": {"type": "runtime", "value": 60 * 60}}, ) mark_nodes_failed_after_missing_script_timeout(current_time, 20) node = reload_object(node) self.assertEquals(self.status, node.status) self.assertThat(self.mock_stop, MockNotCalled()) self.assertEquals( SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status ) self.assertEquals( SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status ) self.assertEquals( SCRIPT_STATUS.PENDING, reload_object(pending_script_result).status ) self.assertEquals( SCRIPT_STATUS.RUNNING, reload_object(running_script_result).status )
def test_mark_nodes_failed_after_script_overrun(self): node, script_set = self.make_node() now = datetime.now() script_set.last_ping = now script_set.save() passed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PASSED) failed_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.FAILED) pending_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.PENDING) script = factory.make_Script(timeout=timedelta(seconds=60)) running_script_result = factory.make_ScriptResult( script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script, started=now - timedelta(minutes=10)) mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals(self.failed_status, node.status) self.assertEquals( "%s has run past it's timeout(%s)" % (running_script_result.name, str(running_script_result.script.timeout)), node.error_description) if node.enable_ssh: self.assertThat(self.mock_stop, MockNotCalled()) else: self.assertThat(self.mock_stop, MockCalledOnce()) self.assertEquals(SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status) self.assertEquals(SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status) self.assertEquals(SCRIPT_STATUS.ABORTED, reload_object(pending_script_result).status) self.assertEquals(SCRIPT_STATUS.TIMEDOUT, reload_object(running_script_result).status)
def test_mark_nodes_failed_after_missing_timeout_heartbeat(self): node, script_set = self.make_node() script_set.last_ping = datetime.now() - timedelta(minutes=11) script_set.save() script_results = [ factory.make_ScriptResult(script_set=script_set, status=SCRIPT_STATUS.PENDING) for _ in range(3) ] mark_nodes_failed_after_missing_script_timeout() node = reload_object(node) self.assertEquals(self.failed_status, node.status) self.assertEquals('Node has missed the last 5 heartbeats', node.error_description) if node.enable_ssh: self.assertThat(self.mock_stop, MockNotCalled()) else: self.assertThat(self.mock_stop, MockCalledOnce()) for script_result in script_results: self.assertEquals(SCRIPT_STATUS.TIMEDOUT, reload_object(script_result).status)