def setUp(self): super(CommandEventHandlerTest, self).setUp() self.testapp = webtest.TestApp(command_event_handler.APP) self.plugin_patcher = mock.patch( "__main__.env_config.CONFIG.plugin") self.plugin_patcher.start() self.request = request_manager.CreateRequest( request_id="1001", user="******", command_infos=[ datastore_entities.CommandInfo( command_line="command_line", cluster="cluster", run_target="run_target"), ]) self.command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line="long command line", cluster="foobar", run_target="foo", run_count=1, shard_count=1), ], shard_indexes=list(range(1)), request_plugin_data={ "ants_invocation_id": "i123", "ants_work_unit_id": "w123" })[0] self.now_patcher = mock.patch.object(common, "Now") self.mock_now = self.now_patcher.start() self.mock_now.return_value = TIMESTAMP
def setUp(self): super(CommandAttemptMonitorTest, self).setUp() self.testapp = webtest.TestApp(command_attempt_monitor.APP) self.plugin_patcher = mock.patch( '__main__.env_config.CONFIG.plugin') self.plugin_patcher.start() self.request = request_manager.CreateRequest( request_id='1001', user='******', command_infos=[ datastore_entities.CommandInfo( command_line='command_line', cluster='cluster', run_target='run_target'), ]) self.command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', cluster='foobar', run_target='foo', run_count=1, shard_count=1), ], shard_indexes=list(range(1)), request_plugin_data={ 'ants_invocation_id': 'i123', 'ants_work_unit_id': 'w123' })[0] # Clear Datastore cache ndb.get_context().clear_cache()
def testMonitor(self, sync): commands = command_manager.CreateCommands( request_id=self.request_2.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', shard_count=2, run_target='foo', run_count=1, cluster='foobar'), datastore_entities.CommandInfo( command_line='longer_command_line', shard_count=2, run_target='foo', run_count=1, cluster='foobar'), ], shard_indexes=list(range(2))) num_monitored = command_monitor.Monitor(commands) self.assertEqual(2, num_monitored) tasks = self.mock_task_scheduler.GetTasks() self.assertEqual(2, len(tasks)) response_0 = self.testapp.post( '/_ah/queue/%s' % command_monitor.COMMAND_SYNC_QUEUE, tasks[0].payload) self.assertEqual('200 OK', response_0.status) response_1 = self.testapp.post( '/_ah/queue/%s' % command_monitor.COMMAND_SYNC_QUEUE, tasks[1].payload) self.assertEqual('200 OK', response_1.status) sync.assert_has_calls([ mock.call(self.request_2.key.id(), commands[0].key.id()), mock.call(self.request_2.key.id(), commands[1].key.id()) ])
def testAddToSyncQueue_CustomCancelDeadline(self, mock_add): # Create a command with a custom 10 hour command timeout that needs to be # cancelled in 1 minute. datastore_entities.Command.update_time._auto_now = False now = datetime.datetime.utcnow() custom_timeout = 10 * 3600 command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='command line', run_target='run_target', run_count=1, shard_count=1, cluster='cluster') ], shard_indexes=list(range(1)), queue_timeout_seconds=custom_timeout)[0] _, request_id, _, command_id = command.key.flat() command.state = common.CommandState.QUEUED command.update_time = now - datetime.timedelta(seconds=custom_timeout - 60) command.put() command_monitor.AddToSyncQueue(command) # Command monitor should schedule it to be synced in 1 minute. payload = json.dumps({ command_manager.COMMAND_ID_KEY: command_id, command_manager.REQUEST_ID_KEY: request_id, }) mock_add.assert_called_once_with( queue_name=command_monitor.COMMAND_SYNC_QUEUE, payload=payload, eta=now + datetime.timedelta(minutes=1))
def testAddToSyncQueue_RunningCommand(self, mock_add, mock_now): # Create a command that has been running for 3 hours. datastore_entities.Command.update_time._auto_now = False now = datetime.datetime.utcnow() mock_now.return_value = now command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='command line', run_target='run_target', run_count=1, shard_count=1, cluster='cluster') ], shard_indexes=list(range(1)))[0] _, request_id, _, command_id = command.key.flat() command.state = common.CommandState.RUNNING command.update_time = now - datetime.timedelta(hours=3) command.put() command_monitor.AddToSyncQueue(command) # Command monitor should schedule it to be synced in # MAX_COMMAND_EVENT_DELAY_MINs. payload = json.dumps({ command_manager.COMMAND_ID_KEY: command_id, command_manager.REQUEST_ID_KEY: request_id, }) mock_add.assert_called_once_with( queue_name=command_monitor.COMMAND_SYNC_QUEUE, payload=payload, eta=now + datetime.timedelta( minutes=command_monitor.MAX_COMMAND_EVENT_DELAY_MIN))
def testSyncCommand(self, mock_ensure, sync): datastore_entities.Command.update_time._auto_now = False now = datetime.datetime.utcnow() command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', cluster='foobar', run_target='foo', run_count=1, shard_count=1) ], shard_indexes=list(range(1)))[0] command.state = common.CommandState.QUEUED command.update_time = ( now - datetime.timedelta( minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 2) command.put() command_monitor.SyncCommand(command.request_id, command.key.id()) mock_ensure.assert_called_once_with( hamcrest.match_equality(hamcrest.has_property('key', command.key))) self.assertEqual(common.CommandState.CANCELED, command.key.get().state) self.assertEqual(common.RequestState.CANCELED, self.request.key.get().state) sync.assert_not_called()
def _CreateAttempt(self, attempt_id, task_id, state): # Helper to create an attempt command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', run_target='foo', run_count=1, shard_count=1, cluster='foobar') ], shard_indexes=list(range(1)), request_plugin_data={ 'ants_invocation_id': 'i123', 'ants_work_unit_id': 'w123' })[0] _, request_id, _, command_id = command.key.flat() attempt_key = ndb.Key( datastore_entities.Request, request_id, datastore_entities.Command, command_id, datastore_entities.CommandAttempt, attempt_id, namespace=common.NAMESPACE) attempt = datastore_entities.CommandAttempt( key=attempt_key, attempt_id=attempt_id, state=state, command_id=command_id, task_id=task_id) attempt.put() return attempt
def testEnqueueCommandEvents_multipleEvents(self): self.request = request_manager.CreateRequest( request_id="9999", user="******", command_infos=[ datastore_entities.CommandInfo( command_line="command_line", cluster="cluster", run_target="run_target", shard_count=2) ]) command_1, command_2 = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line="long command line 0", cluster="foobar", run_target="foo", run_count=1, shard_count=2), datastore_entities.CommandInfo( command_line="long command line 1", cluster="foobar", run_target="foo", run_count=1, shard_count=2) ], shard_indexes=list(range(2))) _, request_id, _, command_1_id = command_1.key.flat() _, _, _, command_2_id = command_2.key.flat() command_event_test_util.CreateCommandAttempt( command_1, "aid", common.CommandState.QUEUED) command_event_test_util.CreateCommandAttempt( command_2, "aid", common.CommandState.QUEUED) event = command_event_test_util.CreateTestCommandEventJson( request_id, command_1_id, "aid", "InvocationStarted") event2 = command_event_test_util.CreateTestCommandEventJson( request_id, command_2_id, "aid", "InvocationStarted") event3 = command_event_test_util.CreateTestCommandEventJson( request_id, command_1_id, "aid", "InvocationCompleted") event4 = command_event_test_util.CreateTestCommandEventJson( request_id, command_2_id, "aid", "InvocationCompleted") command_event_handler.EnqueueCommandEvents([event, event2, event3, event4]) tasks = self.mock_task_scheduler.GetTasks() self.assertEqual(len(tasks), 4) for task in tasks: self.testapp.post( command_event_handler.COMMAND_EVENT_HANDLER_PATH, task.payload) command_attempts = command_manager.GetCommandAttempts( request_id, command_1_id) self.assertEqual(len(command_attempts), 1) self.assertEqual(common.CommandState.COMPLETED, command_attempts[0].state) command_attempts = command_manager.GetCommandAttempts( request_id, command_2_id) self.assertEqual(len(command_attempts), 1) self.assertEqual(common.CommandState.COMPLETED, command_attempts[0].state)
def testSyncCommand_withCustomQueueTimeout(self, mock_ensure, sync): datastore_entities.Command.update_time._auto_now = False now = datetime.datetime.utcnow() command_1, command_2 = command_manager.CreateCommands( request_id=self.request_2.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', shard_count=2, run_target='foo', run_count=1, cluster='foobar'), datastore_entities.CommandInfo( command_line='longer_command_line', shard_count=2, run_target='foo', run_count=1, cluster='foobar'), ], shard_indexes=list(range(2)), queue_timeout_seconds=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN * 2 * 60) # Change update times. command_1 should ensure leasable, command_2 should # ensure leasable and cancel afterwards command_1.state = common.CommandState.QUEUED command_1.update_time = ( now - datetime.timedelta( minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN)) command_1.put() command_2.state = common.CommandState.QUEUED command_2.update_time = ( now - datetime.timedelta( minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 3) command_2.put() command_monitor.SyncCommand(command_1.request_id, command_1.key.id()) command_monitor.SyncCommand(command_2.request_id, command_2.key.id()) mock_ensure.assert_has_calls([ mock.call( hamcrest.match_equality( hamcrest.has_property('key', command_1.key))), mock.call( hamcrest.match_equality( hamcrest.has_property('key', command_2.key))) ]) self.assertEqual(common.CommandState.QUEUED, command_1.key.get().state) self.assertEqual(common.CommandState.CANCELED, command_2.key.get().state) self.assertEqual(common.RequestState.CANCELED, self.request_2.key.get().state) sync.assert_called_once_with(command_1)
def testBackfillCommands(self, mock_add): command_1, command_2, command_3 = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', shard_count=3, run_target='foo', run_count=1, cluster='foobar'), datastore_entities.CommandInfo( command_line='longer_command_line', shard_count=3, run_target='foo', run_count=1, cluster='foobar'), datastore_entities.CommandInfo( command_line='short_cmd', shard_count=3, run_target='foo', run_count=1, cluster='foobar'), ], shard_indexes=list(range(3)), request_plugin_data={ 'ants_invocation_id': 'i123', 'ants_work_unit_id': 'w123' }) command_1.state = common.CommandState.QUEUED command_1.put() command_2.state = common.CommandState.QUEUED command_2.put() command_3.state = common.CommandState.RUNNING command_3.put() response = self.testapp.post_json( '/_ah/api/CoordinatorApi.BackfillCommands', {}) self.assertEqual('200 OK', response.status) mock_add.assert_has_calls( [ mock.call( hamcrest.match_equality( hamcrest.has_property('key', command_1.key))), mock.call( hamcrest.match_equality( hamcrest.has_property('key', command_2.key))), ], any_order=True) self.assertEqual(2, mock_add.call_count)
def testCheckPendingCommands_canceledRequest(self, schedule_tasks, monitor): request_id = "1001" command_infos = [ datastore_entities.CommandInfo(command_line="command_line %04d" % i, cluster="cluster %04d" % i, run_target="run_target %04d" % i, run_count=1, shard_count=1) for i in range(10) ] request = datastore_test_util.CreateRequest( request_id=request_id, user="******", command_infos=command_infos, max_concurrent_tasks=5, plugin_data={ "FOO": "foo", "BAR": "'bar", }) command_manager.CreateCommands(request_id=request_id, command_infos=command_infos, priority=request.priority, shard_indexes=[0] * len(command_infos)) request.state = common.RequestState.CANCELED request.put() commands = command_manager.GetCommands(request_id) for i, command in enumerate(commands): if i < 2: command.state = common.CommandState.COMPLETED elif i < 5: command.state = common.CommandState.QUEUED else: command.state = common.CommandState.UNKNOWN command.put() request_summary = request_manager.RequestSummary() request_summary.completed_count = 2 request_summary.queued_count = 3 request_summary.pending_count = 5 commander._CheckPendingCommands(request, request_summary) schedule_tasks.assert_not_called() monitor.assert_not_called()
def _CreateCommand(self, request_id=REQUEST_ID, run_count=1, priority=None, command_line="command_line1"): """Helper to create a command.""" command = command_manager.CreateCommands( request_id=request_id, command_infos=[ datastore_entities.CommandInfo(command_line=command_line, cluster="cluster", run_target="run_target", run_count=run_count, shard_count=1), ], priority=priority, shard_indexes=[0], request_plugin_data={ "ants_invocation_id": "i123", "command_ants_work_unit_id": "w123" })[0] return command
def testSyncCommand_runningState_doNotAddToQueue(self, mock_ensure, sync): datastore_entities.Command.update_time._auto_now = False now = datetime.datetime.utcnow() command = command_manager.CreateCommands( request_id=self.request.key.id(), command_infos=[ datastore_entities.CommandInfo( command_line='long command line', cluster='foobar', run_target='foo', run_count=1, shard_count=1) ], shard_indexes=list(range(1)))[0] command.state = common.CommandState.RUNNING command.update_time = ( now - datetime.timedelta( minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 2) command.put() command_monitor.SyncCommand(command.request_id, command.key.id(), False) mock_ensure.assert_not_called() self.assertEqual(common.CommandState.RUNNING, command.key.get().state) sync.assert_not_called()
def testProcessCommandEvent_pendingCommands(self, attempt_metric, monitor): # Test ProcessCommandEvent for a non-final state with deletion request_id = "1001" command_infos = [ datastore_entities.CommandInfo(command_line="command_line %04d" % i, cluster="cluster %04d" % i, run_target="run_target %04d" % i, run_count=1, shard_count=1) for i in range(10) ] request = datastore_test_util.CreateRequest( request_id=request_id, user="******", command_infos=command_infos, max_concurrent_tasks=5, plugin_data={ "FOO": "foo", "BAR": "'bar", }) commands = command_manager.CreateCommands(request_id=request_id, command_infos=command_infos, priority=request.priority, shard_indexes=[0] * len(command_infos)) command_manager.ScheduleTasks(commands[:5]) _, request_id, _, command_id = commands[0].key.flat() pending_commands = command_manager.GetCommands( request_id, common.CommandState.UNKNOWN) self.assertEqual(5, len(pending_commands)) queued_commands = command_manager.GetCommands( request_id, common.CommandState.QUEUED) self.assertEqual(5, len(queued_commands)) tasks = command_manager.GetActiveTasks(commands[0]) self.assertEqual(1, len(tasks)) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( commands[0], "attempt0", common.CommandState.UNKNOWN, task=tasks[0]) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "attempt0", common.InvocationEventType.INVOCATION_COMPLETED, task=tasks[0], time=TIMESTAMP) commander.ProcessCommandEvent(event) tasks = command_manager.GetActiveTasks(commands[0]) self.assertEqual(0, len(tasks)) command = commands[0].key.get(use_cache=False) self.assertEqual(common.CommandState.COMPLETED, command.state) attempt_metric.assert_called_once_with(cluster_id=command.cluster, run_target=command.run_target, hostname="hostname", state="COMPLETED") next_command = pending_commands[0] monitor.assert_called_once_with([next_command]) next_command = pending_commands[0].key.get(use_cache=False) self.assertEqual(common.CommandState.QUEUED, next_command.state) pending_commands = command_manager.GetCommands( request_id, common.CommandState.UNKNOWN) self.assertEqual(4, len(pending_commands)) queued_commands = command_manager.GetCommands( request_id, common.CommandState.QUEUED) self.assertEqual(5, len(queued_commands)) completed_commands = command_manager.GetCommands( request_id, common.CommandState.COMPLETED) self.assertEqual(1, len(completed_commands))
def _CreateCommands(request): """Create a list of commands for a request.""" expanded_command_infos = [] shard_indexes = [] for command_info in request.command_infos: if command_info.cluster is None: raise ValueError("cluster is not specified.") if not command_info.run_target: raise ValueError("run target is not defined.") # TODO: Check in db to see that it is a valid run target. if command_info.run_count < 1: raise ValueError("run count must be equal or greater than 1.") max_shards = RUN_TARGET_TO_MAX_SHARDS_MAP.get(command_info.run_target, DEFAULT_MAX_SHARDS) if not 0 < command_info.shard_count <= max_shards: raise ValueError("shard count %d is outside of range [1, %d]" % (command_info.shard_count, max_shards)) # TODO: Move validity check to request_manager. command_line = command_util.CommandLine(command_info.command_line) command_line.RemoveOptions([ # TFC-specific options "--cluster", "--run-target", "--run-count", # TF conflicting options "--loop", # causes TF to loop test runs continuously "--product-type", # causes TF to fail device allocations "--test-iterations", # specifies the number of iterations to run ]) # Schedule commands and tag them with a run_target. # TF implicitly knows how to map a device to a run_target string. When # fetching commands, TF looks for only commands tagged with run_targets # which are available on itself. for shard_index in range(command_info.shard_count): # If the request is unmanaged, use command line to inject shard # parameters. if not request.type: # If local sharding was defined keep the original shard setup local_sharding = False if command_line.GetOption( "--shard-count" ) is not None and command_line.GetOption( "--shard-index") is None: local_sharding = True if not local_sharding: command_line.RemoveOptions( ["--shard-count", "--shard-index"]) if command_info.shard_count > 1: command_line.AddOption("--shard-count", str(command_info.shard_count)) command_line.AddOption("--shard-index", str(shard_index)) expanded_command_infos.append( datastore_entities.CommandInfo( name=command_info.name, command_line=command_line.ToTFString(), cluster=command_info.cluster, run_target=command_info.run_target, run_count=command_info.run_count, shard_count=command_info.shard_count)) shard_indexes.append(shard_index) commands = command_manager.CreateCommands( request_id=request.key.id(), request_plugin_data=request.plugin_data, command_infos=expanded_command_infos, shard_indexes=shard_indexes, priority=request.priority, queue_timeout_seconds=request.queue_timeout_seconds, request_type=request.type) if request.prev_test_context: for command in commands: command_manager.UpdateTestContext( request_id=request.key.id(), command_id=command.key.id(), test_context=request.prev_test_context) return commands