def test_valid_times(self): """Tests node valid_until calculation.""" top = scheduler.Bucket('top', traits=_traits2int(['top'])) left = scheduler.Bucket('left', traits=_traits2int(['left'])) right = scheduler.Bucket('right', traits=_traits2int(['right'])) srv_a = scheduler.Server('a', [10, 10], traits=_traits2int(['a', '0']), valid_until=1) srv_b = scheduler.Server('b', [10, 10], traits=_traits2int(['b', '0']), valid_until=2) srv_y = scheduler.Server('y', [10, 10], traits=_traits2int(['y', '1']), valid_until=3) srv_z = scheduler.Server('z', [10, 10], traits=_traits2int(['z', '1']), valid_until=4) top.add_node(left) top.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) self.assertEqual(top.valid_until, 4) self.assertEqual(left.valid_until, 2) self.assertEqual(right.valid_until, 4) left.remove_node_by_name('a') self.assertEqual(top.valid_until, 4) self.assertEqual(left.valid_until, 2) self.assertEqual(right.valid_until, 4) right.remove_node_by_name('z') self.assertEqual(top.valid_until, 3) self.assertEqual(left.valid_until, 2) self.assertEqual(right.valid_until, 3)
def test_affinity_counters(self): """Tests affinity counters.""" top = scheduler.Bucket('top', traits=_traits2int(['top'])) left = scheduler.Bucket('left', traits=_traits2int(['left'])) right = scheduler.Bucket('right', traits=_traits2int(['right'])) srv_a = scheduler.Server('a', [10, 10], traits=0, valid_until=500) srv_b = scheduler.Server('b', [10, 10], traits=0, valid_until=500) srv_y = scheduler.Server('y', [10, 10], traits=0, valid_until=500) srv_z = scheduler.Server('z', [10, 10], traits=0, valid_until=500) top.add_node(left) top.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) apps_a = app_list(10, 'app_a', 50, [1, 1]) self.assertTrue(srv_a.put(apps_a[0])) self.assertEqual(1, srv_a.affinity_counters['app_a']) self.assertEqual(1, left.affinity_counters['app_a']) self.assertEqual(1, top.affinity_counters['app_a']) srv_z.put(apps_a[0]) self.assertEqual(1, srv_z.affinity_counters['app_a']) self.assertEqual(1, left.affinity_counters['app_a']) self.assertEqual(2, top.affinity_counters['app_a']) srv_a.remove(apps_a[0].name) self.assertEqual(0, srv_a.affinity_counters['app_a']) self.assertEqual(0, left.affinity_counters['app_a']) self.assertEqual(1, top.affinity_counters['app_a'])
def test_size_and_members(self): """Tests recursive size calculation.""" top = scheduler.Bucket('top', traits=_traits2int(['top'])) left = scheduler.Bucket('left', traits=_traits2int(['left'])) right = scheduler.Bucket('right', traits=_traits2int(['right'])) srv_a = scheduler.Server('a', [1, 1], traits=_traits2int(['a', '0']), valid_until=500) srv_b = scheduler.Server('b', [1, 1], traits=_traits2int(['b', '0']), valid_until=500) srv_y = scheduler.Server('y', [1, 1], traits=_traits2int(['y', '1']), valid_until=500) srv_z = scheduler.Server('z', [1, 1], traits=_traits2int(['z', '1']), valid_until=500) top.add_node(left) top.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) # pylint: disable=W0212 self.assertTrue(scheduler._all_isclose(srv_a.size(None), [1, 1])) self.assertTrue(scheduler._all_isclose(left.size(None), [2, 2])) self.assertTrue(scheduler._all_isclose(top.size(None), [4, 4])) self.assertEqual({'a': srv_a, 'b': srv_b, 'y': srv_y, 'z': srv_z}, top.members())
def test_labels(self): """Test scheduling with labels.""" cell = scheduler.Cell('top') left = scheduler.Bucket('left', traits=0) right = scheduler.Bucket('right', traits=0) srv_a = scheduler.Server('a_xx', [10, 10], valid_until=500, label='xx') srv_b = scheduler.Server('b', [10, 10], valid_until=500) srv_y = scheduler.Server('y_xx', [10, 10], valid_until=500, label='xx') srv_z = scheduler.Server('z', [10, 10], valid_until=500) cell.add_node(left) cell.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) app1 = scheduler.Application('app1', 4, [1, 1], 'app') app2 = scheduler.Application('app2', 3, [2, 2], 'app') app3 = scheduler.Application('app_xx_3', 2, [3, 3], 'app') app4 = scheduler.Application('app_xx_4', 1, [4, 4], 'app') cell.partitions[None].allocation.add(app1) cell.partitions[None].allocation.add(app2) cell.partitions['xx'].allocation.add(app3) cell.partitions['xx'].allocation.add(app4) cell.schedule() self.assertIn(app1.server, ['b', 'z']) self.assertIn(app2.server, ['b', 'z']) self.assertIn(app3.server, ['a_xx', 'y_xx']) self.assertIn(app4.server, ['a_xx', 'y_xx'])
def test_serialization(self): """Tests cell serialization.""" # Disable pylint's too many statements warning. # # pylint: disable=R0915 cell = scheduler.Cell('top') left = scheduler.Bucket('left', traits=0) right = scheduler.Bucket('right', traits=0) srv_a = scheduler.Server('a', [10, 10], traits=0, valid_until=500) srv_b = scheduler.Server('b', [10, 10], traits=0, valid_until=500) srv_y = scheduler.Server('y', [10, 10], traits=0, valid_until=500) srv_z = scheduler.Server('z', [10, 10], traits=0, valid_until=500) cell.add_node(left) cell.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) left.level = 'rack' right.level = 'rack' apps = app_list(10, 'app', 50, [1, 1], affinity_limits={'server': 1, 'rack': 1}) cell.add_app(cell.partitions[None].allocation, apps[0]) cell.add_app(cell.partitions[None].allocation, apps[1]) cell.add_app(cell.partitions[None].allocation, apps[2]) cell.add_app(cell.partitions[None].allocation, apps[3]) cell.schedule()
def _construct_cell(): """Constructs a test cell.""" cell = scheduler.Cell('top') rack1 = scheduler.Bucket('rack:rack1', traits=0, level='rack') rack2 = scheduler.Bucket('rack:rack2', traits=0, level='rack') cell.add_node(rack1) cell.add_node(rack2) srv1 = scheduler.Server('srv1', [10, 20, 30], traits=3, valid_until=1000) srv2 = scheduler.Server('srv2', [10, 20, 30], traits=7, valid_until=2000) srv3 = scheduler.Server('srv3', [10, 20, 30], traits=0, valid_until=3000) srv4 = scheduler.Server('srv4', [10, 20, 30], traits=0, valid_until=4000) rack1.add_node(srv1) rack1.add_node(srv2) rack2.add_node(srv3) rack2.add_node(srv4) tenant1 = scheduler.Allocation() tenant2 = scheduler.Allocation() tenant3 = scheduler.Allocation() alloc1 = scheduler.Allocation([10, 10, 10], rank=100, traits=0) alloc2 = scheduler.Allocation([10, 10, 10], rank=100, traits=3) cell.partitions[None].allocation.add_sub_alloc('t1', tenant1) cell.partitions[None].allocation.add_sub_alloc('t2', tenant2) tenant1.add_sub_alloc('t3', tenant3) tenant2.add_sub_alloc('a1', alloc1) tenant3.add_sub_alloc('a2', alloc2) return cell
def test_restore(self): """Tests app restore.""" cell = scheduler.Cell('top') large_server = scheduler.Server('large', [10, 10], traits=0, valid_until=200) cell.add_node(large_server) small_server = scheduler.Server('small', [3, 3], traits=0, valid_until=1000) cell.add_node(small_server) apps = app_list(1, 'app', 50, [6, 6], lease=50) for app in apps: cell.add_app(cell.partitions[None].allocation, app) # 100 sec left, app lease is 50, should fit. time.time.return_value = 100 cell.schedule() self.assertEqual(apps[0].server, 'large') time.time.return_value = 190 apps_not_fit = app_list(1, 'app-not-fit', 90, [6, 6], lease=50) for app in apps_not_fit: cell.add_app(cell.partitions[None].allocation, app) cell.schedule() self.assertIsNone(apps_not_fit[0].server) self.assertEqual(apps[0].server, 'large')
def test_bucket_placement(self): """Tests placement strategies.""" top = scheduler.Bucket('top') a_bucket = scheduler.Bucket('a_bucket') top.add_node(a_bucket) b_bucket = scheduler.Bucket('b_bucket') top.add_node(b_bucket) a1_srv = scheduler.Server('a1_srv', [10, 10], valid_until=500) a_bucket.add_node(a1_srv) a2_srv = scheduler.Server('a2_srv', [10, 10], valid_until=500) a_bucket.add_node(a2_srv) b1_srv = scheduler.Server('b1_srv', [10, 10], valid_until=500) b_bucket.add_node(b1_srv) b2_srv = scheduler.Server('b2_srv', [10, 10], valid_until=500) b_bucket.add_node(b2_srv) # bunch of apps with the same affinity apps1 = app_list(10, 'app1', 50, [1, 1]) apps2 = app_list(10, 'app2', 50, [1, 1]) # Default strategy is spread, so placing 4 apps1 will result in each # node having one app. self.assertTrue(top.put(apps1[0])) self.assertTrue(top.put(apps1[1])) self.assertTrue(top.put(apps1[2])) self.assertTrue(top.put(apps1[3])) # from top level, it will spread between a and b buckets, so first # two apps go to a1_srv, b1_srv respectively. # # 3rd app - buckets rotate, and a bucket is preferred again. Inside the # bucket, next node is chosed. Same for 4th app. # # Result is the after 4 placements they are spread evenly. # self.assertEqual(1, len(a1_srv.apps)) self.assertEqual(1, len(a2_srv.apps)) self.assertEqual(1, len(b1_srv.apps)) self.assertEqual(1, len(b2_srv.apps)) a_bucket.set_affinity_strategy('app2', scheduler.PackStrategy) self.assertTrue(top.put(apps2[0])) self.assertTrue(top.put(apps2[1])) self.assertTrue(top.put(apps2[2])) self.assertTrue(top.put(apps2[3])) # B bucket still uses spread strategy. self.assertEqual(2, len(b1_srv.apps)) self.assertEqual(2, len(b2_srv.apps)) # Without predicting exact placement, apps will be placed on one of # the servers in A bucket but not the other, as they use pack strateg. self.assertNotEquals(len(a1_srv.apps), len(a2_srv.apps))
def test_node_features(self): """Tests node feature inheritance.""" top = scheduler.Bucket('top', features=['top']) left = scheduler.Bucket('left', features=['left']) right = scheduler.Bucket('right', features=['right']) srv_a = scheduler.Server('a', [10, 10], features=['a', '0'], valid_until=500) srv_b = scheduler.Server('b', [10, 10], features=['b', '0'], valid_until=500) srv_y = scheduler.Server('y', [10, 10], features=['y', '1'], valid_until=500) srv_z = scheduler.Server('z', [10, 10], features=['z', '1'], valid_until=500) top.add_node(left) top.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) self.assertTrue(top.features.has('a')) self.assertTrue(top.features.has('b')) self.assertTrue(top.features.has('0')) self.assertTrue(top.features.has('y')) self.assertTrue(top.features.has('z')) self.assertTrue(top.features.has('1')) self.assertTrue(left.features.has('a')) self.assertTrue(left.features.has('b')) self.assertTrue(left.features.has('0')) self.assertFalse(left.features.has('y')) self.assertFalse(left.features.has('z')) self.assertFalse(left.features.has('1')) self.assertTrue(srv_a.features.has('top')) self.assertTrue(srv_a.features.has('left')) self.assertFalse(srv_a.features.has('right')) left.remove_node('a') self.assertFalse(left.features.has('a')) self.assertTrue(left.features.has('b')) self.assertTrue(left.features.has('0')) self.assertFalse(top.features.has('a')) self.assertTrue(top.features.has('b')) self.assertTrue(top.features.has('0')) left.remove_node('b') self.assertFalse(left.features.has('b')) self.assertFalse(left.features.has('0')) self.assertFalse(top.features.has('b')) self.assertFalse(top.features.has('0'))
def test_node_traits(self): """Tests node trait inheritance.""" top = scheduler.Bucket('top', traits=_traits2int(['top'])) left = scheduler.Bucket('left', traits=_traits2int(['left'])) right = scheduler.Bucket('right', traits=_traits2int(['right'])) srv_a = scheduler.Server('a', [10, 10], traits=_traits2int(['a', '0']), valid_until=500) srv_b = scheduler.Server('b', [10, 10], traits=_traits2int(['b', '0']), valid_until=500) srv_y = scheduler.Server('y', [10, 10], traits=_traits2int(['y', '1']), valid_until=500) srv_z = scheduler.Server('z', [10, 10], traits=_traits2int(['z', '1']), valid_until=500) top.add_node(left) top.add_node(right) left.add_node(srv_a) left.add_node(srv_b) right.add_node(srv_y) right.add_node(srv_z) self.assertTrue(top.traits.has(_trait2int('a'))) self.assertTrue(top.traits.has(_trait2int('b'))) self.assertTrue(top.traits.has(_trait2int('0'))) self.assertTrue(top.traits.has(_trait2int('y'))) self.assertTrue(top.traits.has(_trait2int('z'))) self.assertTrue(top.traits.has(_trait2int('1'))) self.assertTrue(left.traits.has(_trait2int('a'))) self.assertTrue(left.traits.has(_trait2int('b'))) self.assertTrue(left.traits.has(_trait2int('0'))) self.assertFalse(left.traits.has(_trait2int('y'))) self.assertFalse(left.traits.has(_trait2int('z'))) self.assertFalse(left.traits.has(_trait2int('1'))) left.remove_node_by_name('a') self.assertFalse(left.traits.has(_trait2int('a'))) self.assertTrue(left.traits.has(_trait2int('b'))) self.assertTrue(left.traits.has(_trait2int('0'))) self.assertFalse(top.traits.has(_trait2int('a'))) self.assertTrue(top.traits.has(_trait2int('b'))) self.assertTrue(top.traits.has(_trait2int('0'))) left.remove_node_by_name('b') self.assertFalse(left.traits.has(_trait2int('b'))) self.assertFalse(left.traits.has(_trait2int('0'))) self.assertFalse(top.traits.has(_trait2int('b'))) self.assertFalse(top.traits.has(_trait2int('0')))
def test_reschedule(self): """Tests application placement.""" srv_1 = scheduler.Server('1', [10, 10, 10], valid_until=1000, traits=0) srv_2 = scheduler.Server('2', [10, 10, 10], valid_until=1000, traits=0) srv_3 = scheduler.Server('3', [10, 10, 10], valid_until=1000, traits=0) srv_4 = scheduler.Server('4', [10, 10, 10], valid_until=1000, traits=0) cell = self.master.cell cell.add_node(srv_1) cell.add_node(srv_2) cell.add_node(srv_3) cell.add_node(srv_4) app1 = scheduler.Application('app1', 4, [1, 1, 1], 'app') app2 = scheduler.Application('app2', 3, [2, 2, 2], 'app') cell.add_app(cell.partitions[None].allocation, app1) cell.add_app(cell.partitions[None].allocation, app2) # At this point app1 is on server 1, app2 on server 2. self.master.reschedule() treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1', {'expires': 500, 'identity': None}, acl=mock.ANY), mock.call(mock.ANY, '/placement/2/app2', {'expires': 500, 'identity': None}, acl=mock.ANY), ], any_order=True) treadmill.zkutils.ensure_deleted.reset_mock() treadmill.zkutils.put.reset_mock() srv_1.state = scheduler.State.down self.master.reschedule() treadmill.zkutils.ensure_deleted.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1'), ]) treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/3/app1', {'expires': 500, 'identity': None}, acl=mock.ANY), mock.call(mock.ANY, '/placement', mock.ANY, acl=mock.ANY), ]) # Verify that placement data was properly saved as a compressed json. args, _kwargs = treadmill.zkutils.put.call_args_list[1] placement_data = args[2] placement = json.loads( zlib.decompress(placement_data).decode() ) self.assertIn(['app1', '1', 500, '3', 500], placement) self.assertIn(['app2', '2', 500, '2', 500], placement)
def test_renew(self): """Tests app restore.""" cell = scheduler.Cell('top') server_a = scheduler.Server('a', [10, 10], traits=0, valid_until=1000) cell.add_node(server_a) apps = app_list(1, 'app', 50, [6, 6], lease=50) for app in apps: cell.add_app(cell.partitions[None].allocation, app) cell.schedule() self.assertEqual(apps[0].server, 'a') self.assertEqual(apps[0].placement_expiry, 60) time.time.return_value = 100 cell.schedule() self.assertEqual(apps[0].server, 'a') self.assertEqual(apps[0].placement_expiry, 60) time.time.return_value = 200 apps[0].renew = True cell.schedule() self.assertEqual(apps[0].server, 'a') self.assertEqual(apps[0].placement_expiry, 250) self.assertFalse(apps[0].renew) # fast forward to 975, close to server 'a' expiration, app will # migratoe to 'b' on renew. server_b = scheduler.Server('b', [10, 10], traits=0, valid_until=2000) cell.add_node(server_b) time.time.return_value = 975 apps[0].renew = True cell.schedule() self.assertEqual(apps[0].server, 'b') self.assertEqual(apps[0].placement_expiry, 1025) self.assertFalse(apps[0].renew) # fast forward to 1975, when app can't be renewed on server b, but # there is not alternative placement. time.time.return_value = 1975 apps[0].renew = True cell.schedule() self.assertEqual(apps[0].server, 'b') # Placement expiry did not change, as placement was not found. self.assertEqual(apps[0].placement_expiry, 1025) # Renew flag is not cleared, as new placement was not found. self.assertTrue(apps[0].renew)
def test_reschedule(self): """Tests application placement.""" srv_1 = scheduler.Server('1', [10, 10, 10], valid_until=1000, traits=0) srv_2 = scheduler.Server('2', [10, 10, 10], valid_until=1000, traits=0) srv_3 = scheduler.Server('3', [10, 10, 10], valid_until=1000, traits=0) srv_4 = scheduler.Server('4', [10, 10, 10], valid_until=1000, traits=0) cell = self.master.cell cell.add_node(srv_1) cell.add_node(srv_2) cell.add_node(srv_3) cell.add_node(srv_4) app1 = scheduler.Application('app1', 4, [1, 1, 1], 'app') app2 = scheduler.Application('app2', 3, [2, 2, 2], 'app') cell.add_app(cell.partitions[None].allocation, app1) cell.add_app(cell.partitions[None].allocation, app2) # At this point app1 is on server 1, app2 on server 2. self.master.reschedule() treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1', { 'expires': 500, 'identity': None }, acl=mock.ANY), mock.call(mock.ANY, '/placement/2/app2', { 'expires': 500, 'identity': None }, acl=mock.ANY), ]) srv_1.state = scheduler.State.down self.master.reschedule() treadmill.zkutils.ensure_deleted.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1'), ]) treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/3/app1', { 'expires': 500, 'identity': None }, acl=mock.ANY), mock.call(mock.ANY, '/placement', mock.ANY), ])
def prepareData(nodes_count, app_count, affinity): scheduler.DIMENSION_COUNT = 3 cell = scheduler.Cell("local", labels=set([None])) num_racks = math.ceil(nodes_count / NODES_PER_RACK) counter_nodes = nodes_count for i in range(0, num_racks): rack = scheduler.Bucket('racks' + str(i), traits=0) cell.add_node(rack) for j in range(0, NODES_PER_RACK): if counter_nodes is 0: break counter_nodes = counter_nodes - 1 rack.add_node( scheduler.Server( 'node' + str(j), resources({ "memory": "2G", "disk": "20G", "cpu": "90%" }), time.time() * 2)) for app_idx in range(0, app_count): prio = random.randint(0, 5) demand = resources({"memory": "1G", "disk": "10G", "cpu": "40%"}) name = 'app_.%s' % (app_idx) app = scheduler.Application(name, prio, demand, affinity=affinity(app_idx)) cell.partitions[None].allocation.add(app) return cell
def test_schedule_once(self): """Tests schedule once trait on server down.""" cell = scheduler.Cell('top') for idx in range(0, 10): server = scheduler.Server(str(idx), [10, 10], traits=0, valid_until=time.time() + 1000) cell.add_node(server) apps = app_list(2, 'app', 50, [6, 6], schedule_once=True) for app in apps: cell.add_app(cell.partitions[None].allocation, app) cell.schedule() self.assertNotEquals(apps[0].server, apps[1].server) self.assertFalse(apps[0].evicted) self.assertFalse(apps[0].evicted) cell.children_by_name[apps[0].server].state = scheduler.State.down cell.remove_node_by_name(apps[1].server) cell.schedule() self.assertIsNone(apps[0].server) self.assertTrue(apps[0].evicted) self.assertIsNone(apps[1].server) self.assertTrue(apps[1].evicted)
def _construct_cell(empty=False): """Constructs a test cell.""" cell = scheduler.Cell('top') if empty: return cell rack1 = scheduler.Bucket('rack:rack1', traits=0, level='rack') rack2 = scheduler.Bucket('rack:rack2', traits=0, level='rack') cell.add_node(rack1) cell.add_node(rack2) srv1 = scheduler.Server('srv1', [10, 20, 30], traits=1, valid_until=1000, label='part') srv2 = scheduler.Server('srv2', [10, 20, 30], traits=3, valid_until=2000, label='part') srv3 = scheduler.Server('srv3', [10, 20, 30], traits=0, valid_until=3000, label='_default') srv4 = scheduler.Server('srv4', [10, 20, 30], traits=0, valid_until=4000, label='_default') rack1.add_node(srv1) rack1.add_node(srv2) rack2.add_node(srv3) rack2.add_node(srv4) tenant1 = scheduler.Allocation() cell.partitions['_default'].allocation.add_sub_alloc('t1', tenant1) tenant11 = scheduler.Allocation() tenant1.add_sub_alloc('t11', tenant11) alloc1 = scheduler.Allocation([10, 10, 10], rank=100, traits=0) tenant11.add_sub_alloc('a1', alloc1) tenant2 = scheduler.Allocation() cell.partitions['part'].allocation.add_sub_alloc('t2', tenant2) alloc2 = scheduler.Allocation([10, 10, 10], rank=100, traits=3) tenant2.add_sub_alloc('a2', alloc2) return cell
def test_reschedule_maxutil(self): """Tests application placement.""" srv_1 = scheduler.Server('1', [10, 10, 10], valid_until=1000, traits=0) srv_2 = scheduler.Server('2', [10, 10, 10], valid_until=1000, traits=0) srv_3 = scheduler.Server('3', [10, 10, 10], valid_until=1000, traits=0) srv_4 = scheduler.Server('4', [10, 10, 10], valid_until=1000, traits=0) cell = self.master.cell cell.add_node(srv_1) cell.add_node(srv_2) cell.add_node(srv_3) cell.add_node(srv_4) app1 = scheduler.Application('app1', 4, [1, 1, 1], 'app') app2 = scheduler.Application('app2', 3, [2, 2, 2], 'app') cell.partitions[None].allocation.set_reserved([1, 1, 1]) cell.partitions[None].allocation.set_max_utilization(2) cell.add_app(cell.partitions[None].allocation, app1) cell.add_app(cell.partitions[None].allocation, app2) self.master.reschedule() treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1', { 'expires': 500, 'identity': None }, acl=mock.ANY), ]) app2.priority = 5 self.master.reschedule() treadmill.zkutils.ensure_deleted.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1'), ]) treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/2/app2', { 'expires': 500, 'identity': None }, acl=mock.ANY), ])
def test_schedule_once_eviction(self): """Tests schedule once trait with eviction.""" cell = scheduler.Cell('top') for idx in xrange(0, 10): server = scheduler.Server(str(idx), [10, 10], traits=0, valid_until=time.time() + 1000) cell.add_node(server) # Each server has capacity 10. # # Place two apps - capacity 1, capacity 8, they will occupy entire # server. # # Try and place app with demand of 2. First it will try to evict # small app, but it will not be enough, so it will evict large app. # # Check that evicted flag is set only for large app, and small app # will be restored. small_apps = app_list(10, 'small', 50, [1, 1], schedule_once=True) for app in small_apps: cell.add_app(cell.partitions[None].allocation, app) large_apps = app_list(10, 'large', 60, [8, 8], schedule_once=True) for app in large_apps: cell.add_app(cell.partitions[None].allocation, app) placement = cell.schedule() # Check that all apps are placed. app2server = { app: after for app, _, _, after, _ in placement if after is not None } self.assertEquals(len(app2server), 20) # Add one app, higher priority than rest, will force eviction. medium_apps = app_list(1, 'medium', 70, [5, 5]) for app in medium_apps: cell.add_app(cell.partitions[None].allocation, app) cell.schedule() self.assertEquals(len([app for app in small_apps if app.evicted]), 0) self.assertEquals(len([app for app in small_apps if app.server]), 10) self.assertEquals(len([app for app in large_apps if app.evicted]), 1) self.assertEquals(len([app for app in large_apps if app.server]), 9) # Remove app, make sure the evicted app is not placed again. cell.remove_app(medium_apps[0].name) cell.schedule() self.assertEquals(len([app for app in small_apps if app.evicted]), 0) self.assertEquals(len([app for app in small_apps if app.server]), 10) self.assertEquals(len([app for app in large_apps if app.evicted]), 1) self.assertEquals(len([app for app in large_apps if app.server]), 9)
def test_reschedule_once(self): """Tests application placement.""" srv_1 = scheduler.Server('1', [10, 10, 10], valid_until=1000, features=[]) srv_2 = scheduler.Server('2', [10, 10, 10], valid_until=1000, features=[]) srv_3 = scheduler.Server('3', [10, 10, 10], valid_until=1000, features=[]) srv_4 = scheduler.Server('4', [10, 10, 10], valid_until=1000, features=[]) cell = self.master.cell cell.add_node(srv_1) cell.add_node(srv_2) cell.add_node(srv_3) cell.add_node(srv_4) app1 = scheduler.Application('app1', 4, [1, 1, 1], 'app', schedule_once=True) app2 = scheduler.Application('app2', 3, [2, 2, 2], 'app') cell.add_app(cell.allocation, app1) cell.add_app(cell.allocation, app2) # At this point app1 is on server 1, app2 on server 2. self.master.reschedule() treadmill.zkutils.put.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1', None, acl=mock.ANY), mock.call(mock.ANY, '/placement/2/app2', None, acl=mock.ANY), ]) srv_1.state = scheduler.State.down self.master.reschedule() treadmill.zkutils.ensure_deleted.assert_has_calls([ mock.call(mock.ANY, '/placement/1/app1'), mock.call(mock.ANY, '/scheduled/app1'), ])
def reload_server(self, servername): """Reload individual server.""" _LOGGER.info('reloading server: %s', servername) if servername not in self.servers: # This server was never loaded. self.load_server(servername) return current_server = self.servers[servername] # Check if server is same try: data = zkutils.get(self.zkclient, z.path.server(servername)) if not data: # The server is configured, but never reported it's capacity. self.remove_server(servername) return # TODO: need better error handling. assert 'parent' in data assert data['parent'] in self.buckets # TODO: seems like this is cut/paste code from load_server. label = data.get('partition') if not label: label = admin.DEFAULT_PARTITION up_since = data.get('up_since', time.time()) partition = self.cell.partitions[label] server = scheduler.Server( servername, resources(data), valid_until=partition.valid_until(up_since), label=label, traits=data.get('traits', 0)) parent = self.buckets[data['parent']] # TODO: assume that bucket topology is constant, e.g. # rack can never change buiding. If this does not # hold, comparing parents is not enough, need to # compare recursively all the way up. if (current_server.is_same(server) and current_server.parent == parent): # Nothing changed, no need to update anything. _LOGGER.info('server is same, keeping old.') current_server.valid_until = server.valid_until else: # Something changed - clear everything and re-register server # as new. _LOGGER.info('server modified, replacing.') self.remove_server(servername) self.load_server(servername) except kazoo.client.NoNodeError: self.remove_server(servername) _LOGGER.warn('Server node not found: %s', servername)
def _construct_cell(): """Constructs a test cell.""" cell = scheduler.Cell('top') rack1 = scheduler.Bucket('rack:rack1', features=[], level='rack') rack2 = scheduler.Bucket('rack:rack2', features=[], level='rack') cell.add_node(rack1) cell.add_node(rack2) srv1 = scheduler.Server('srv1', [10, 20, 30], features=['aaa', 'bbb'], valid_until=1000) srv2 = scheduler.Server('srv2', [10, 20, 30], features=['ccc'], valid_until=2000) srv3 = scheduler.Server('srv3', [10, 20, 30], features=[], valid_until=3000) srv4 = scheduler.Server('srv4', [10, 20, 30], features=[], valid_until=4000) rack1.add_node(srv1) rack1.add_node(srv2) rack2.add_node(srv3) rack2.add_node(srv4) tenant1 = scheduler.Allocation() tenant2 = scheduler.Allocation() tenant3 = scheduler.Allocation() alloc1 = scheduler.Allocation([10, 10, 10], rank=100, features=[]) alloc2 = scheduler.Allocation([10, 10, 10], rank=100, features=['aaa']) cell.allocation.add_sub_alloc('t1', tenant1) cell.allocation.add_sub_alloc('t2', tenant2) tenant1.add_sub_alloc('t3', tenant3) tenant2.add_sub_alloc('a1', alloc1) tenant3.add_sub_alloc('a2', alloc2) return cell
def test_bucket_capacity(self): """Tests adjustment of bucket capacity up and down.""" parent = scheduler.Bucket('top') bucket = scheduler.Bucket('b') parent.add_node(bucket) srv1 = scheduler.Server('n1', [10, 5], valid_until=500) bucket.add_node(srv1) self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 5.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 5.]))) srv2 = scheduler.Server('n2', [5, 10], valid_until=500) bucket.add_node(srv2) self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 10.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 10.]))) srv3 = scheduler.Server('n3', [3, 3], valid_until=500) bucket.add_node(srv3) self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 10.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 10.]))) bucket.remove_node_by_name('n3') self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 10.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 10.]))) bucket.remove_node_by_name('n1') self.assertTrue(np.array_equal(bucket.free_capacity, np.array([5., 10.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([5., 10.])))
def test_app_node_placement(self): """Tests capacity adjustments for app placement.""" parent = scheduler.Bucket('top') bucket = scheduler.Bucket('a_bucket') parent.add_node(bucket) srv1 = scheduler.Server('n1', [10, 5], valid_until=500) bucket.add_node(srv1) srv2 = scheduler.Server('n2', [10, 5], valid_until=500) bucket.add_node(srv2) self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 5.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 5.]))) self.assertTrue(np.array_equal(bucket.size(None), np.array([20., 10.]))) # Create 10 identical apps. apps = app_list(10, 'app', 50, [1, 2]) self.assertTrue(srv1.put(apps[0])) # Capacity of buckets should not change, other node is intact. self.assertTrue(np.array_equal(bucket.free_capacity, np.array([10., 5.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([10., 5.]))) self.assertTrue(srv1.put(apps[1])) self.assertTrue(srv2.put(apps[2])) self.assertTrue(np.array_equal(bucket.free_capacity, np.array([9., 3.]))) self.assertTrue(np.array_equal(parent.free_capacity, np.array([9., 3.])))
def test_emtpy(self): """Simple test to test empty bucket""" cell = scheduler.Cell('top') empty = scheduler.Bucket('empty', traits=0) cell.add_node(empty) bucket = scheduler.Bucket('bucket', traits=0) srv_a = scheduler.Server('a', [10, 10], traits=0, valid_until=500) bucket.add_node(srv_a) cell.add_node(bucket) cell.schedule()
def create_server(self, servername, data): """Create a new server object from server data.""" label = data.get('partition') if not label: # TODO: it will be better to have separate module for constants # and avoid unnecessary cross imports. label = admin.DEFAULT_PARTITION up_since = data.get('up_since', int(time.time())) server = scheduler.Server(servername, resources(data), up_since=up_since, label=label, traits=data.get('traits', 0)) return server
def load_server(self, servername, readonly=False): """Load individual server.""" try: data = zkutils.get(self.zkclient, z.path.server(servername)) if not data: # The server is configured, but never reported it's capacity. _LOGGER.info('No capacity detected: %s', z.path.server(servername)) return assert 'parent' in data parentname = data['parent'] label = data.get('partition') if not label: # TODO: it will be better to have separate module for constants # and avoid unnecessary cross imports. label = admin.DEFAULT_PARTITION up_since = data.get('up_since', int(time.time())) partition = self.cell.partitions[label] server = scheduler.Server( servername, resources(data), valid_until=partition.valid_until(up_since), label=label, traits=data.get('traits', 0)) parent = self.buckets.get(parentname) if not parent: _LOGGER.warn('Server parent does not exist: %s/%s', servername, parentname) return self.buckets[parentname].add_node(server) self.servers[servername] = server assert server.parent == self.buckets[parentname] if not readonly: zkutils.ensure_exists(self.zkclient, z.path.placement(servername), acl=[_SERVERS_ACL]) self.adjust_server_state(servername, readonly) except kazoo.client.NoNodeError: _LOGGER.warn('Server node not found: %s', servername)
def test_identity(self): """Tests scheduling apps with identity.""" cell = scheduler.Cell('top') for idx in range(0, 10): server = scheduler.Server(str(idx), [10, 10], traits=0, valid_until=time.time() + 1000) cell.add_node(server) cell.configure_identity_group('ident1', 3) apps = app_list(10, 'app', 50, [1, 1], identity_group='ident1') for app in apps: cell.add_app(cell.partitions[None].allocation, app) self.assertTrue(apps[0].acquire_identity()) self.assertEqual(set([1, 2]), apps[0].identity_group_ref.available) self.assertEqual(set([1, 2]), apps[1].identity_group_ref.available) cell.schedule() self.assertEqual(apps[0].identity, 0) self.assertEqual(apps[1].identity, 1) self.assertEqual(apps[2].identity, 2) for idx in range(3, 10): self.assertIsNone(apps[idx].identity, None) # Removing app will release the identity, and it will be aquired by # next app in the group. cell.remove_app('app-2') cell.schedule() self.assertEqual(apps[3].identity, 2) # Increase ideneity group count to 5, expect 5 placed apps. cell.configure_identity_group('ident1', 5) cell.schedule() self.assertEqual( 5, len([app for app in apps if app.server is not None]) ) cell.configure_identity_group('ident1', 3) cell.schedule() self.assertEqual( 3, len([app for app in apps if app.server is not None]) )
def load_server(self, servername): """Load individual server.""" try: data = self.backend.get(z.path.server(servername)) if not data: # The server is configured, but never reported it's capacity. _LOGGER.info('No capacity detected: %s', z.path.server(servername)) return assert 'parent' in data parentname = data['parent'] label = data.get('partition') if not label: # TODO: it will be better to have separate module for constants # and avoid unnecessary cross imports. label = admin.DEFAULT_PARTITION up_since = data.get('up_since', int(time.time())) server = scheduler.Server(servername, resources(data), up_since=up_since, label=label, traits=data.get('traits', 0)) parent = self.buckets.get(parentname) if not parent: _LOGGER.warning('Server parent does not exist: %s/%s', servername, parentname) return self.buckets[parentname].add_node(server) self.servers[servername] = server assert server.parent == self.buckets[parentname] self.backend.ensure_exists(z.path.placement(servername)) self.adjust_server_state(servername) except be.ObjectNotFoundError: _LOGGER.warning('Server node not found: %s', servername)
def prepareData(nodes_count, app_count, affinity): scheduler.DIMENSION_COUNT = 3 cell = scheduler.Cell("local", labels=set([None])) for idx in range(0, nodes_count): node = scheduler.Server('node' + str(idx), resources({ "memory": "2G", "disk": "20G", "cpu": "90%" }), time.time() * 2) cell.add_node(node) for app_idx in range(0, app_count): prio = random.randint(0, 5) demand = resources({ "memory": "1G", "disk": "10G", "cpu": "40%" }) name = 'app_.%s' % (app_idx) app = scheduler.Application(name, prio, demand, affinity=affinity(app_idx)) cell.partitions[None].allocation.add(app) return cell
def create_server(self, servername, data): """Create a new server object from server data.""" label = data.get('partition') if not label: # TODO: it will be better to have separate module for constants # and avoid unnecessary cross imports. label = _DEFAULT_PARTITION up_since = data.get('up_since', int(time.time())) trait_list = data.get('traits', []) traitz, code = traits.encode( self.trait_codes, trait_list, add_new=True ) self.trait_codes = code server = scheduler.Server( servername, resources(data), up_since=up_since, label=label, traits=traitz ) return server