def test_midstreamSchedule_taskAlreadyRunning(self): """ There is 1 machine, and a task is running on it for the next 200 ticks. Ensure the scheduler doesn't schedule anything on it until the existing task is done. Note: In this test, the already-running task has no dependency relationships with other tasks that might be scheduled. """ machines = [Machine(0)] configuration = SystemConfiguration(machines) task = scheduling.Task(0, {machines[0]: 300}) query = Query(0, [JobDag(JobNode(Job(0, [task], 1), []))]) runningTask = scheduling.Task(1, {machines[0]: 1000}) runningQuery = Query(1, [JobDag(JobNode(Job(1, [runningTask], 1), []))]) queue = [query, runningQuery] systemState = SystemState(configuration, 0, queue, [RunningTask(runningTask, machines[0], 200)], []) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ScheduledTask(task, machines[0], 200)]), EPSILON), "actual=%s" % schedule)
def test_initialSchedule_2machines(self): """ The scheduler is asked for a schedule when no tasks are currently running and there are two queries in the queue. Query 0 has 1 job with 1 task, which will take 200 ticks on machine 0 and 300 ticks on machine 1. Query 1 has 1 job with 1 task, which will take 300 ticks on machine 0 and 200 ticks on machine 1. The scheduler should schedule both tasks from the start, on different machines. """ machines = [Machine(0), Machine(1)] configuration = SystemConfiguration(machines) query0Task0 = scheduling.Task(0, {machines[0]: 200, machines[1]: 300}) query0 = Query(0, [JobDag(JobNode(Job(0, [query0Task0], 1), []))]) query1Task0 = scheduling.Task(0, {machines[0]: 300, machines[1]: 200}) query1 = Query(1, [JobDag(JobNode(Job(1, [query1Task0], 1), []))]) queue = [query0, query1] systemState = SystemState(configuration, 0, queue, [], []) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ ScheduledTask(query0Task0, machines[0], 0), ScheduledTask(query1Task0, machines[1], 0) ]), EPSILON), "actual=%s" % schedule)
def test_initialSchedule_simple(self): """ The scheduler is asked for a schedule when no tasks are currently running and there are two queries in the queue. Query 0 has 1 job with 1 task that will take 1000 ticks. Query 1 has 1 job with 1 task that will take 200 ticks. There is 1 machine. The scheduler should schedule query 1's task and then query 0's task. """ machines = [Machine(0)] configuration = SystemConfiguration(machines) query0Task0 = scheduling.Task(0, {machines[0]: 1000}) query0 = Query(0, [JobDag(JobNode(Job(0, [query0Task0], 1), []))]) query1Task0 = scheduling.Task(1, {machines[0]: 200}) query1 = Query(1, [JobDag(JobNode(Job(1, [query1Task0], 1), []))]) queue = [query0, query1] systemState = SystemState(configuration, 0, queue, [], []) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ ScheduledTask(query1Task0, machines[0], 0), ScheduledTask(query0Task0, machines[0], 200) ]), EPSILON), "actual=%s" % schedule)
def test_initialSchedule_2machines_2tasks(self): """ The scheduler is asked for a schedule when no tasks are currently running and there are two queries in the queue. Query 0 has 1 job with 2 tasks, one of which will take 200 ticks on any machine, and the other of which will take 1200 ticks on any machine. Query 1 has 1 job with 2 tasks, both of which take 1000 ticks on any machine. There are 2 machines. All values are perturbed slightly so that there is a unique best solution. The scheduler should schedule query 1's tasks and then query 0's tasks, since this will minimize average finishing time. """ machines = [Machine(0), Machine(1)] configuration = SystemConfiguration(machines) query0Task0 = scheduling.Task(0, {machines[0]: 200, machines[1]: 201}) query0Task1 = scheduling.Task(1, { machines[0]: 1201, machines[1]: 1200 }) query0 = Query( 0, [JobDag(JobNode(Job(0, [query0Task0, query0Task1], 2), []))]) query1Task0 = scheduling.Task(2, { machines[0]: 1000, machines[1]: 1001 }) query1Task1 = scheduling.Task(3, { machines[0]: 1001, machines[1]: 1000 }) query1 = Query( 1, [JobDag(JobNode(Job(1, [query1Task0, query1Task1], 2), []))]) queue = [query0, query1] systemState = SystemState(configuration, 0, queue, [], []) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ ScheduledTask(query1Task0, machines[0], 0), ScheduledTask(query1Task1, machines[1], 0), ScheduledTask(query0Task0, machines[0], 1000), ScheduledTask(query0Task1, machines[1], 1000) ]), EPSILON), "actual=%s" % schedule)
def test_midstreamSchedule_taskAlreadyFinished(self): """ A query in the queue has 1 task that has already finished. Ensure this task is ignored properly. In particular, it should not run, and it should not be treated as a dependency for the query. """ machines = [Machine(0)] configuration = SystemConfiguration(machines) task = scheduling.Task(0, {machines[0]: 300}) completedTask = scheduling.Task(1, {machines[0]: 1000}) query = Query(0, [JobDag(JobNode(Job(0, [task, completedTask], 2), []))]) queue = [query] systemState = SystemState(configuration, 0, queue, [], [completedTask]) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ScheduledTask(task, machines[0], 0)]), EPSILON), "actual=%s" % schedule)
def handleTask(task): taskId = task.getId() if preexistingAssignments.has_key(taskId): assignment = preexistingAssignments[taskId] return [ scheduling.Task( task.getId(), defaultdict(int, { assignment.getMachine(): assignment.getRemainingTime() })) ] else: return [task]
def test_midstreamSchedule_nonzeroTime(self): """ The current time is nonzero. Ensure the scheduler produces a schedule that starts at the current time (rather than at time 0, for example). The other setup is arbitrary. """ machines = [Machine(0)] configuration = SystemConfiguration(machines) task = scheduling.Task(0, {machines[0]: 1000}) query = Query(0, [JobDag(JobNode(Job(0, [task], 1), []))]) queue = [query] systemState = SystemState(configuration, 100, queue, [], []) scheduler = OnlineGreedyScheduler() schedule = scheduler.handleNewQuery(systemState) self.assertTrue( schedule.approxEquals( Schedule([ScheduledTask(task, machines[0], 100)]), EPSILON), "actual=%s" % schedule)
def addFakeRoot(originalJobDag): fakeRootJob = Job(None, [scheduling.Task(None, defaultdict(int))], 1) return JobDag(JobNode(fakeRootJob, [originalJobDag.getRoot()]))